
global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/build_mp_comparison.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

* v8
* Build data to match and compare our US machinery patents with Mann-Puettmann sample


* --------------------  * 
* appln-cipc6 dataset
* --------------------  * 

* Prepare US Machinery patents

*load machinery patents
use ${dataset_dir}/patent_list/pats_tfa.dta, clear
mmerge appln_id using ${commondata_dir}/patstat_2018b/publn_info.dta, unmatched(master) ukeep(*year* publn_nr publn_auth)

*keep only US patents
keep if publn_auth=="US"
mmerge appln_id using ${commondata_dir}/patstat_2018b/appln_info.dta, unmatched(none)

*make sure we have the right intelectual property type
keep if ipr_type == "PI":ipr_type2 & appln_year != 9999
duplicates drop

log using ${numb_dir}/build_mp_comparison_numbers.log, replace name(num)

* Match with Mann-Puettmann
mmerge publn_nr using ${commondata_dir}/mann_puettmann/puettmann_patent.dta, unmatched(none)

keep appln_id year week *automat
duplicates drop
duplicates report appln_id *automat

cap log close num

sort appln_id year
by appln_id: gen x = _n
keep if x == 1
drop x

* Add auto90 auto95 patent indicators from DHOZ
ren automat automat_puettmann 
ren raw_automat raw_automat_puettmann
foreach tech in auto90 auto95 pauto90 { 
    mmerge appln_id using ${dataset_dir}/patent_list/`tech'_patents.dta, unmatched(master)
    gen `tech' = (_m == 3)
}


* Apply cipc6's
mmerge appln_id using ${dataset_dir}/patstat_orbis/cipc_codes.dta, unmatched(none)
drop _m
sort appln_id cipc6
order cipc6, before(year)

save ${final_dir}/MP_DHOZ_common_data.dta,replace

*** Generate and compare shares per technology-code
* --------------------  * 
* Techlevel dataset
* --------------------  * 
log using ${numb_dir}/build_mp_comparison_numbers.log, append name(num)
* Prepare a tech-level file of class machinery & auto90/95
import delim ${classification_dir}/V6/ipc6XX_tf.csv, varnames(1) clear
gen machinery_field = techn_sector == "Mechanical engineering" & (techn_field == "Handling" ///
    | techn_field == "Machine tools" | techn_field == "Other special machines" ///
    | techn_field == "Textile and paper machines")
gen ipc3 = substr(ipc6, 1, 3)
gen ipc4 = substr(ipc6, 1, 4)
replace machinery_field = 0 if ipc3 == "F41" | ipc3 == "F42"
replace machinery_field = 1 if ipc4 == "B42C"
replace machinery_field = 1 if ipc4 == "B07C"
replace machinery_field = 1 if ipc6 == "G05B19"
replace machinery_field = 1 if ipc6 == "G05B2219"
replace machinery_field = 1 if ipc6 == "B62D65"
gen ipc1 = substr(ipc6, 1, 1)
assert machinery_field == 0 if ipc1 == "Y"
tab techn_field if machinery_field == 1
replace techn_field = "non-machinery" if machinery_field == 0
replace techn_field = "non-classified" if ipc1 == "Y"
drop ipc1 ipc3 ipc4
_pctile share_anyclassification if machinery == 1 & total >= 100, p(95) 
return list 
gen auto95 = 0
replace auto95 = 1 if share_anyclassification >= r(r1) & machinery == 1 & total >= 100
_pctile share_anyclassification if machinery == 1 & total >= 100, p(90) 
return list
gen auto90 = 0
replace auto90 = 1 if share_anyclassification >= r(r1) & machinery == 1 & total >= 100
ren ipc6 ipc6xx
tempfile ipc6xx_mach_auto
save `ipc6xx_mach_auto', replace

cap log close num

* Merge prevalence using 6-digit XX codes classification   
import delimited ${classification_dir}/V6/ipc6XX_mapping.csv, varnames(1) clear
ren ipc6 cipc6

mmerge cipc6 using ${final_dir}/MP_DHOZ_common_data.dta, unmatched(u)
replace ipc6xx=cipc6 if _m==2
keep ipc6xx appln_id *automat_puettmann
mmerge ipc6xx using `ipc6xx_mach_auto', unmatched(u)
bys ipc6xx : gen _total_ipc6xx = _N

* Calculate "share_class" for Mann Puettmann
bys ipc6xx : egen _total_auto = sum(automat_puettmann)
gen share_auto_puettmann = _total_auto/_total_ipc6xx
bys ipc6xx : egen _total_raw_auto = sum(raw_automat_puettmann)
gen share_raw_auto_puettmann = _total_raw_auto/_total_ipc6xx
replace share_auto_puettmann = . if _m==2
replace share_raw_auto_puettmann = . if _m==2
drop _m automat_puettmann raw_automat_puettmann 


* Calculate share auto90, auto95, pauto90 patents according to our DHOZ class
foreach tech in auto90 auto95 pauto90 { 
    mmerge appln_id using ${dataset_dir}/patent_list/`tech'_patents.dta, unmatched(master)
    gen `tech'_appln = (_m == 3)
    bys ipc6xx : egen _total_`tech' = sum(`tech'_appln)
    gen share_`tech' = _total_`tech'/_total_ipc6xx
    drop `tech'_appln _total_`tech'
    drop _m
    label var share_`tech' "Share of `tech' patents DHOZ"
}

drop appln_id 
duplicates drop
ren _total_ipc6xx total_MP_patent
ren _total_auto total_automat_MP
ren _total_raw total_raw_automat_MP



* save the 6digit technology level file
save ${final_dir}/MP_DHOZ_ipc_comparison.dta, replace

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat